import pandas as pd
from shutil import copyfile
import matplotlib.pyplot as plt #for graphs
import pandas as pd #for storing data
import numpy as np #for math operations
import seaborn as sns #for visualisation
import matplotlib.ticker as ticker #to change graph axes
from datetime import date #date manipulation
from sklearn.preprocessing import scale #standardization
First of all, we import the dataset containing all the factors and the stocks that we are going to analyze.
df = pd.read_excel("Euro.xlsx", header = None)
df.index = df[0]
df = df.drop(0, axis =1)
df
1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | ... | 102 | 103 | 104 | 105 | 106 | 107 | 108 | 109 | 110 | 111 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | |||||||||||||||||||||
ABI BB Equity | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
Date | 2003-01-31 00:00:00 | 2003-02-28 00:00:00 | 2003-03-31 00:00:00 | 2003-04-30 00:00:00 | 2003-05-30 00:00:00 | 2003-06-30 00:00:00 | 2003-07-31 00:00:00 | 2003-08-29 00:00:00 | 2003-09-30 00:00:00 | 2003-10-31 00:00:00 | ... | 2011-06-30 00:00:00 | 2011-07-29 00:00:00 | 2011-08-31 00:00:00 | 2011-09-30 00:00:00 | 2011-10-31 00:00:00 | 2011-11-30 00:00:00 | 2011-12-30 00:00:00 | 2012-01-31 00:00:00 | 2012-02-29 00:00:00 | 2012-03-30 00:00:00 |
PE_RATIO | 19.5111 | 16.8231 | 19.8095 | 21.8594 | 20.6439 | 19.2732 | 19.3167 | 21.3866 | 21.6364 | 20.2557 | ... | 19.2185 | 19.1443 | 18.3465 | 17.713 | 18.5708 | 19.7267 | 16.705 | 16.5097 | 18.3504 | 19.8999 |
FIVE_YR_AVG_PRICE_EARNINGS | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 18.7176 | 18.7176 | 18.7176 | 18.7176 | 18.7176 | 18.7176 | 18.7709 | 18.7709 | 18.7709 | 18.7709 |
T12M_DIL_PE_CONT_OPS | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 18.836 | 18.7632 | 17.9813 | 17.3605 | 18.2012 | 19.334 | 16.3761 | 16.1847 | 17.9892 | 19.5082 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
NORMALIZED_ROE | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
5YR_AVG_RETURN_ON_EQUITY | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
CUR_MKT_CAP | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
NORMALIZED_ACCRUALS_BS_METHOD | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
PX_TO_BOOK_RATIO | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
34313 rows × 111 columns
# find all the stocks in the dataset
lst = list(df[df.isnull().all(axis=1)].index)
del lst[2 - 1::2]
stocks = []
for i in lst:
stocks.append(" ".join(i.split(" ")[:-1]))
# save date from columns name
time = df.iloc[1]
The function "get_factor" gets in input a name of a factor, and returns a dataset containing, for each stock, the value of that specific factor along the time span.
df = df.replace(0,np.nan)
def get_factor(name):
sub = df.loc[name]
sub.index = stocks
sub.columns = time
return sub
date_df = get_factor('Date')
We check the assumption 5 ("Assets must have the same time frequency") and remove the assets that have different time frequencies.
date_df = date_df.drop(date_df.columns[0], axis=1)
boole = ~date_df.isnull().any(axis=1).values
# Number of deleted stocks
len(boole) - boole.sum()
163
# Remove the stock with different time series from stock list
stocks = date_df[boole].index
# Chenge the get_factor function in order to remove stock with different time frequency
def get_factor(name):
sub = df.loc[name][boole]
sub.index = stocks
sub.columns = time
return sub
Firstly we removed price and volatility.
tot_factors = ['PE_RATIO', 'FIVE_YR_AVG_PRICE_EARNINGS', 'T12M_DIL_PE_CONT_OPS', '10_YEAR_MOVING_AVERAGE_PE', 'PX_TO_TANG_BV_PER_SH',
'CURRENT_EV_TO_12M_SALES', 'CURRENT_EV_TO_T12M_EBITDA', 'FIVE_YEAR_AVG_EV_TO_T12_EBITDA', 'T12M_DIL_EPS_CONT_OPS',
'TRAIL_12M_EBITDA_PER_SHARE', 'TRAIL_12M_SALES_PER_SH', 'NET_DEBT_PER_SHARE', 'TANG_BOOK_VAL_PER_SH',
'NORMALIZED_ACCRUALS_CF_METHOD', 'EBITDA_MARGIN', 'EBITDA_MARGIN_3YR_AVG', 'RSI_14D', 'RSI_30D', 'RSI_9D', 'OPERATING_ROIC',
'EQY_DPS_NET_5YR_GROWTH', 'EQY_REC_CONS', 'BEST_EPS', 'WACC_COST_EQUITY', 'NORMALIZED_ROE', '5YR_AVG_RETURN_ON_EQUITY',
'CUR_MKT_CAP', 'PX_TO_BOOK_RATIO']
nan_factors = []
for factor_name in tot_factors:
factor = get_factor(factor_name)
for col in range(1,len(time),3):
# For a factor to be mantained it has to have at least 50 non-nan stocks
if factor.iloc[:,col].notnull().sum() < 50:
nan_factors.append(factor_name)
break
nan_factors
['T12M_DIL_PE_CONT_OPS', '10_YEAR_MOVING_AVERAGE_PE', 'T12M_DIL_EPS_CONT_OPS', 'BEST_EPS']
tot_factors = [x for x in tot_factors if x not in nan_factors]
In this subsection we are going to make other preliminary operations and create the price multiplicator dataset.
This dataset is going to be used to calculate the return of each stock in our portfolios.
price = get_factor("PX_LAST")
def returns(row):
row = row.astype('float64')
return np.log(row) - np.log(row.shift(1))
log_returns = price.apply(returns, axis = 1)
log_returns.head()
Date | 2003-01-31 | 2003-02-28 | 2003-03-31 | 2003-04-30 | 2003-05-30 | 2003-06-30 | 2003-07-31 | 2003-08-29 | 2003-09-30 | 2003-10-31 | ... | 2011-06-30 | 2011-07-29 | 2011-08-31 | 2011-09-30 | 2011-10-31 | 2011-11-30 | 2011-12-30 | 2012-01-31 | 2012-02-29 | 2012-03-30 |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
ABI BB | NaN | -0.153347 | 0.152804 | 0.074728 | -0.109818 | 0.076775 | 0.024005 | 0.125930 | -0.050171 | -0.059747 | ... | -0.046884 | 0.004988 | -0.044508 | 0.033880 | 0.011001 | 0.096745 | 0.065631 | -0.020287 | 0.084059 | 0.082937 |
FP FP | NaN | -0.024176 | -0.055337 | 0.012849 | 0.057868 | 0.055463 | 0.001517 | 0.058213 | -0.075046 | 0.031146 | ... | -0.003754 | -0.054095 | -0.105566 | -0.022610 | 0.129102 | 0.013657 | 0.029937 | 0.022653 | 0.038478 | -0.093549 |
UNA NA | NaN | 0.005720 | 0.036402 | 0.034241 | -0.127350 | -0.061837 | 0.075821 | 0.005935 | -0.003955 | -0.010547 | ... | -0.003312 | 0.001989 | 0.039603 | 0.008030 | 0.049882 | 0.012932 | 0.049176 | -0.043067 | -0.020644 | 0.023195 |
SAN FP | NaN | 0.019742 | -0.073364 | 0.147717 | 0.016698 | -0.063619 | -0.017805 | 0.021719 | 0.019343 | 0.019915 | ... | 0.007605 | -0.020961 | -0.069203 | -0.026199 | 0.051536 | 0.002499 | 0.085683 | -0.004946 | -0.017146 | 0.047838 |
ENI IM | NaN | -0.024254 | -0.116373 | 0.045515 | 0.070347 | -0.040761 | -0.004947 | 0.048619 | -0.047704 | 0.040117 | ... | -0.021232 | -0.071800 | -0.080921 | -0.057326 | 0.190233 | -0.019578 | 0.020828 | 0.054100 | 0.023971 | 0.016046 |
5 rows × 111 columns
def multiplicator(row):
return row/row.shift(1)
price_mul = price.apply(multiplicator, axis=1)
In this section we are going to build the portfolio corresponding to our benchmark. To do so, we are going to build an equal weight portfolio, with all the stocks.
# We initialize an empty dataframe
sub = pd.DataFrame(columns=time, index = stocks)
i=2
# Remove the transaction cost in the first iteration of wealth
wealth = 600000 * 0.998
# To calculate the benchmark we select only the stocks that have no null information in the price df
sel_stocks = price[~price.isnull().any(axis=1)].index
# Allocate wealth equally through the selected stocks
sub.loc[sel_stocks, sub.columns[1]] = wealth/len(sel_stocks)
# Calculate the value of the portfolio at each iteration of time
while i < len(time):
sub.iloc[:,i] = sub.iloc[:,i-1] * price_mul.iloc[:,i]
i += 1
# Build output dataframe
bench = pd.DataFrame(columns=time[1:])
# Insert value information
bench.loc['value'] = sub.sum()[1:]
# Insert return information
bench.loc['returns'] = (bench.loc['value'].astype('float') - bench.loc['value'].shift(1).astype('float'))/bench.loc['value'].shift(1).astype('float')
bench.loc['cum_returns'] = (bench.loc['value'].astype('float') - bench.loc['value'][1])/bench.loc['value'][1]
bench.loc['log_returns'] = np.log(bench.loc['value'].astype('float')) - np.log(bench.loc['value'].shift(1).astype('float'))
benchmark = bench
benchmark
Date | 2003-02-28 | 2003-03-31 | 2003-04-30 | 2003-05-30 | 2003-06-30 | 2003-07-31 | 2003-08-29 | 2003-09-30 | 2003-10-31 | 2003-11-28 | ... | 2011-06-30 | 2011-07-29 | 2011-08-31 | 2011-09-30 | 2011-10-31 | 2011-11-30 | 2011-12-30 | 2012-01-31 | 2012-02-29 | 2012-03-30 |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
value | 598800.0 | 587425.533415 | 660207.337435 | 692876.772973 | 731648.311379 | 773002.994227 | 821343.08105 | 823181.517205 | 900586.823293 | 929618.039072 | ... | 2302978.120544 | 2203851.081235 | 2021796.168429 | 1872358.936239 | 1986169.576593 | 1907954.125365 | 1919944.106303 | 2045750.950641 | 2142830.946715 | 2152527.775027 |
returns | NaN | -0.018995 | 0.1239 | 0.049484 | 0.055957 | 0.056523 | 0.062535 | 0.002238 | 0.094032 | 0.032236 | ... | -0.017381 | -0.043043 | -0.082608 | -0.073913 | 0.060785 | -0.03938 | 0.006284 | 0.065526 | 0.047454 | 0.004525 |
cum_returns | 0.019363 | 0.0 | 0.1239 | 0.179514 | 0.245517 | 0.315917 | 0.398208 | 0.401338 | 0.533108 | 0.582529 | ... | 2.92046 | 2.751711 | 2.441791 | 2.187398 | 2.381143 | 2.247993 | 2.268404 | 2.482571 | 2.647834 | 2.664342 |
log_returns | NaN | -0.019178 | 0.116804 | 0.048298 | 0.054448 | 0.054983 | 0.060658 | 0.002236 | 0.08987 | 0.031727 | ... | -0.017533 | -0.043997 | -0.08622 | -0.076787 | 0.059009 | -0.040176 | 0.006265 | 0.063469 | 0.046363 | 0.004515 |
4 rows × 110 columns
plt.figure().set_figwidth(12)
plt.subplot(1,2,1)
benchmark.loc['log_returns'].plot(label = 'Benchmark')
plt.title("Benchmark log returns")
plt.ylabel("log return")
plt.grid()
plt.subplot(1,2,2)
benchmark.loc['value'].plot(label = 'Benchmark')
plt.title("Benchmark value")
plt.ylabel("value (in million)")
plt.grid()
plt.show()
We are now going to build the function to calculate the univariate screening portfolios. The function takes in input:
The function return a portfolio dataset that has, for every month, the information about the value of the portfolio, its return, its cumulative return and its logarithmic returns.
def uniPortfolio(data, holding, worst = False, wealth = 600000, n_stocks=15, INtrans_cost = 0.002):
# Initiate dataset
sub = pd.DataFrame(columns=time, index = stocks)
# Set i = 1, we want to start at month 2, so that we can use the information of the factor at month 1
i=1
# Remove the transaction cost in the first iteration of wealth
wealth = wealth * (1 - INtrans_cost)
# Loop through i. i is going to represent the month in which the selected stocks are modified based on the factor
while i < len(time):
# If we are not in the first month, we have to compute the portfolio value in the new month before reallocation
if i != 1:
portfT = sub.iloc[:,i-1] * price_mul.iloc[:,i]
# Overwrite the wealth as the portfolio total value
wealth = portfT.sum()
# Create a copy of the factor
factor = data
# From this new copy, select only the rows of the stocks for which we have the price_mul information
if i+holding < len(time):
factor = factor.loc[price_mul.iloc[:,range(i,i+ holding)].notnull().all(axis=1),:]
else:
factor = factor.loc[price_mul.iloc[:,i:].notnull().all(axis=1),:]
# Select stocks based on factor
sel_stocks = factor.sort_values(factor.columns[i-1], ascending=worst).index[:n_stocks]
# Allocate wealth equally through the selected stocks
sub.loc[sel_stocks, sub.columns[i]] = wealth/n_stocks
# If we are not in the first month, than we have to compute the transaction costs
if i != 1:
# We compute the transaction costs based on the difference between the portfolio before and after reallocation
trans_cost = abs(sub.iloc[:,i] - portfT).sum() * INtrans_cost
# Remove the transaction cost from total wealth
wealth = wealth - trans_cost
# Overwrite allocation of wealth equally through the selected stocks
sub.loc[sel_stocks, sub.columns[i]] = wealth/n_stocks
# J is going to loop through the months in which there is no reallocation
for j in range(1,holding):
# Break the j loop if the months are finished
if i + j > len(time)-1:
break
# Compute the value of all the stocks at time t as their value at time t-1 times theprice multiplicator at time t
sub.iloc[:,i+j] = sub.iloc[:,i+j-1] * price_mul.iloc[:,i+j]
# Increment i by the holding period before next iteration
i += holding
# Since we started from i=1, the first element of the subset is going to be 0, so we are going to remove it
# Build output dataframe
portf = pd.DataFrame(columns=time[1:])
# Insert value information
portf.loc['value'] = sub.sum()[1:]
# Insert return information
portf.loc['returns'] = (portf.loc['value'].astype('float') - portf.loc['value'].shift(1).astype('float'))/portf.loc['value'].shift(1).astype('float')
portf.loc['cum_returns'] = (portf.loc['value'].astype('float') - portf.loc['value'][0])/portf.loc['value'][0]
portf.loc['log_returns'] = np.log(portf.loc['value'].astype('float')) - np.log(portf.loc['value'].shift(1).astype('float'))
return portf
We are now going to build the Information Ratio metric in order to order the best univariate portfolios. In this way, we are going to understand which are the factors that lead to a better univariate screening portfolio, and use these in the sequantial and simultaneous models.
def IR(port):
num2 = benchmark.loc['log_returns']
num1 = port.loc['log_returns']
num = (num1 - num2).mean() * 12
den = np.std(port.loc['log_returns'] - benchmark.loc['log_returns']) * np.sqrt(12)
return num/den
To represent the Risk Free asset we used the 13 weekes Treasury Bill.
# Import the data from yahoo finance
import yfinance as yf
IRX = yf.download('^IRX','2002-12-31','2012-03-30', interval='1mo')
IRX = IRX['Close']
[*********************100%***********************] 1 of 1 completed
IRX = pd.DataFrame(IRX).transpose()
IRX.index = ["value"]
IRX.columns = time
IRX
Date | 2003-01-31 | 2003-02-28 | 2003-03-31 | 2003-04-30 | 2003-05-30 | 2003-06-30 | 2003-07-31 | 2003-08-29 | 2003-09-30 | 2003-10-31 | ... | 2011-06-30 | 2011-07-29 | 2011-08-31 | 2011-09-30 | 2011-10-31 | 2011-11-30 | 2011-12-30 | 2012-01-31 | 2012-02-29 | 2012-03-30 |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
value | 1.148 | 1.175 | 1.09 | 1.101 | 1.085 | 0.838 | 0.927 | 0.96 | 0.926 | 0.932 | ... | 0.015 | 0.09 | 0.015 | 0.01 | 0.005 | 0.015 | 0.005 | 0.055 | 0.075 | 0.065 |
1 rows × 111 columns
IRX.loc['log_returns'] = np.log(IRX.loc['value'].astype('float')) - np.log(IRX.loc['value'].shift(1).astype('float'))
def Sharpe(port):
num2 = IRX.loc['log_returns'][1:]
num1 = port.loc['log_returns']
num = (num1 - num2).mean()
den = np.std(port.loc['log_returns'])
return num/den
def Sortino(port):
num2 = IRX.loc['log_returns'][1:]
num1 = port.loc['log_returns']
num = (num1 - num2).mean()
den = np.std(port.loc['log_returns', port.loc['log_returns'] < port.loc['log_returns'].mean()])
return num/den
In this section we are going to use the just computed metrics in order to decide the best parameters for the univariate screening function.
First of all, we are going to understand, for each factor, which is the better performing portfolio, between the one built using the stocks with higher values of the factor and the one built using the stocks with lower values.
# We are going to store the data in a list of two dictionary
comparison = [{},{}]
worst_list = [False, True]
# We are going to use the average of this four different holding periods
hold_list = [3,6,12,111]
# The first dictionary is going to contain the metrics relative to the portfolio that use the stocks
# with higher value of the factor. The second will use the lower.
for worst in range(len(worst_list)):
# Loop through the factor
for factor in tot_factors:
# get the data of the factor
sub = get_factor(factor)
lst = []
for i in range(4):
hold = hold_list[i]
port = uniPortfolio(sub, hold, n_stocks=100, worst = worst_list[worst], INtrans_cost = 0)
lst.append(IR(port))
# The resulting IR of the portfolio is computed as the average IR between the different holding periods
comparison[worst][factor] = sum(lst) / len(lst)
X_axis = np.arange(len(comparison[0]))
plt.bar(X_axis -0.2, comparison[0].values(), 0.4, label = 'Top')
plt.bar(X_axis +0.2, comparison[1].values(), 0.4, label = 'Bottom')
plt.axhline(y = 0, color = 'black')
plt.xticks(X_axis, comparison[0].keys(), rotation = 90)
plt.xlabel("Factors")
plt.ylabel("Information Ratio")
plt.title("Average IR of the Top and Bottom portfolios for each factor")
plt.legend()
plt.show()
From the resulting plot we can see that the factor "current market cap" (using the assets with lower values) seems to achieve unusually high values of IR.
We now store this information in a new dictionary. The dictionary diz_factors will have, for each factor, the information relative to which kind of stocks have to be taken (top or bottom).
diz_factors = {}
for el in range(len(comparison[0])):
diz_factors[list(comparison[0].keys())[el]] = list(comparison[0].values())[el] < list(comparison[1].values())[el]
diz_factors
{'PE_RATIO': True, 'FIVE_YR_AVG_PRICE_EARNINGS': True, 'PX_TO_TANG_BV_PER_SH': True, 'CURRENT_EV_TO_12M_SALES': True, 'CURRENT_EV_TO_T12M_EBITDA': True, 'FIVE_YEAR_AVG_EV_TO_T12_EBITDA': True, 'TRAIL_12M_EBITDA_PER_SHARE': True, 'TRAIL_12M_SALES_PER_SH': True, 'NET_DEBT_PER_SHARE': True, 'TANG_BOOK_VAL_PER_SH': True, 'NORMALIZED_ACCRUALS_CF_METHOD': True, 'EBITDA_MARGIN': True, 'EBITDA_MARGIN_3YR_AVG': True, 'RSI_14D': True, 'RSI_30D': True, 'RSI_9D': False, 'OPERATING_ROIC': False, 'EQY_DPS_NET_5YR_GROWTH': True, 'EQY_REC_CONS': False, 'WACC_COST_EQUITY': True, 'NORMALIZED_ROE': False, '5YR_AVG_RETURN_ON_EQUITY': True, 'CUR_MKT_CAP': True, 'PX_TO_BOOK_RATIO': True}
We are now going to choose the best holding period based on the average information ratio that its portfolios achieve.
# The information is goint to be stored in a list of 4 dictionaries
hold_comparison = [{},{},{},{}]
# hold_list contains the holding periods we are going to compare
hold_list = [3,6,12,111]
# For each holding period and for each factor, compute the resulting IR of the portfolio
for i in range(4):
hold = hold_list[i]
for factor in diz_factors.keys():
sub = get_factor(factor)
port = uniPortfolio(sub, hold, n_stocks=15, worst = diz_factors[factor])
hold_comparison[i][factor + "_" + str(hold)] = IR(port)
lst_holdComp = []
# We store the value information in a list containing 4 lists
for i in range(4):
lst_holdComp.append(list(hold_comparison[i].values()))
import warnings
warnings.filterwarnings("ignore")
fig = plt.figure(figsize =(7, 4))
# Creating axes instance
ax = fig.add_axes([0, 0, 1, 1])
ax.set_xticklabels(hold_list)
# Creating plot
bp = ax.boxplot(lst_holdComp, showmeans=True)
plt.axhline(y = 0, color = 'black')
plt.title("Boxplot of the IR achieved for every holding period")
plt.ylabel("Information Ratio")
plt.xlabel("Holding Period")
plt.show()
There are no statistically significant differences between the different holding period. Though, since the 12 month box has an higher mean, from now on we are going to consider this holding period. The outlier present in each of the first three holding period is the factor "current market cap".
We are now going to to build a portfolio for each factor and evaluate their metrics. In this way we are going to understand which are the factors that lead to a better univariate screening portfolio, and use these to build the following strategies.
# Initialize an empty rank dataframe
rank = pd.DataFrame(columns = ['Information', 'Sharpe', 'Sortino'], index = list(diz_factors.keys()))
# For each factor, build a portfolio and insert in the dataframe its resulting metrics
for factor in list(diz_factors.keys()):
port = uniPortfolio(get_factor(factor), 12, n_stocks=15, worst=diz_factors[factor])
rank.loc[factor, 'Information'] = IR(port)
rank.loc[factor, 'Sharpe'] = Sharpe(port)
rank.loc[factor, 'Sortino'] = Sortino(port)
# Order the resulting portfolios by their resulting Information Ratio
rank = rank.sort_values(by='Information', ascending=False)
rank
Information | Sharpe | Sortino | |
---|---|---|---|
CUR_MKT_CAP | 2.572272 | 0.964028 | 1.774545 |
PE_RATIO | 1.268581 | 0.775805 | 0.982374 |
CURRENT_EV_TO_12M_SALES | 1.190999 | 0.694106 | 0.894214 |
FIVE_YEAR_AVG_EV_TO_T12_EBITDA | 1.158677 | 0.756905 | 0.924165 |
CURRENT_EV_TO_T12M_EBITDA | 1.107566 | 0.72325 | 0.874313 |
PX_TO_BOOK_RATIO | 0.890569 | 0.567376 | 1.038024 |
NORMALIZED_ACCRUALS_CF_METHOD | 0.887866 | 0.667879 | 1.106075 |
5YR_AVG_RETURN_ON_EQUITY | 0.852702 | 0.560658 | 0.861664 |
TRAIL_12M_EBITDA_PER_SHARE | 0.735485 | 0.576269 | 1.07594 |
EBITDA_MARGIN_3YR_AVG | 0.668972 | 0.583426 | 0.969576 |
TRAIL_12M_SALES_PER_SH | 0.597328 | 0.588323 | 0.883077 |
EBITDA_MARGIN | 0.578179 | 0.545594 | 1.023334 |
RSI_9D | 0.532972 | 0.850518 | 1.085326 |
PX_TO_TANG_BV_PER_SH | 0.488429 | 0.584849 | 0.87474 |
RSI_30D | 0.453949 | 0.516281 | 0.768311 |
EQY_DPS_NET_5YR_GROWTH | 0.388422 | 0.704911 | 1.085478 |
FIVE_YR_AVG_PRICE_EARNINGS | 0.334923 | 0.6265 | 0.7239 |
RSI_14D | 0.306011 | 0.482653 | 0.85147 |
NET_DEBT_PER_SHARE | 0.142135 | 0.846456 | 1.029318 |
WACC_COST_EQUITY | -0.047826 | 0.889192 | 1.347257 |
OPERATING_ROIC | -0.212811 | 0.722881 | 0.958099 |
EQY_REC_CONS | -0.225417 | 0.702182 | 0.759324 |
NORMALIZED_ROE | -0.287531 | 0.62834 | 0.76751 |
TANG_BOOK_VAL_PER_SH | -0.93403 | 0.45706 | 0.514472 |
plt.figure(figsize =(10, 6))
benchmark.loc['value'].plot(label = 'benchmark', color='black')
uniPortfolio(get_factor('CUR_MKT_CAP'), 12, worst=diz_factors['CUR_MKT_CAP']).loc['value'].plot(label = 'CUR_MKT_CAP', color='red')
uniPortfolio(get_factor('PE_RATIO'), 12, worst=diz_factors['PE_RATIO']).loc['value'].plot(label = 'PE_RATIO')
uniPortfolio(get_factor('CURRENT_EV_TO_12M_SALES'), 12, worst=diz_factors['CURRENT_EV_TO_12M_SALES']).loc['value'].plot(label = 'CURRENT_EV_TO_12M_SALES')
uniPortfolio(get_factor('FIVE_YEAR_AVG_EV_TO_T12_EBITDA'), 12, worst=diz_factors['FIVE_YEAR_AVG_EV_TO_T12_EBITDA']).loc['value'].plot(label = 'FIVE_YEAR_AVG_EV_TO_T12_EBITDA')
uniPortfolio(get_factor('CURRENT_EV_TO_T12M_EBITDA'), 12, worst=diz_factors['CURRENT_EV_TO_T12M_EBITDA']).loc['value'].plot(label = 'CURRENT_EV_TO_T12M_EBITDA')
plt.title("Current Market Cap vs other 4 best univariate portfolios")
plt.ylabel("value (in tens of millions)")
plt.grid()
plt.legend()
plt.show()
We now try to understand better this unusual returns of the portfolio built with the "Current Market Cap" factor. To do so, we build a portfolio with only one stock per year, and look for unusual returns.
plt.figure().set_figwidth(12)
plt.subplot(1,2,1)
benchmark.loc['log_returns'].plot(color = 'black', label='benchmark')
uniPortfolio(get_factor('CUR_MKT_CAP'), 12, worst=diz_factors['CUR_MKT_CAP'], n_stocks=1).loc['log_returns'].plot(color='red')
plt.axvspan(455, 458, color="blue", alpha=0.2)
plt.axvspan(493, 498, color="blue", alpha=0.2)
plt.ylabel("log returns")
plt.grid()
plt.subplot(1,2,2)
benchmark.loc['value'].plot(color = 'black', label='benchmark')
uniPortfolio(get_factor('CUR_MKT_CAP'), 12, worst=diz_factors['CUR_MKT_CAP'], n_stocks=1).loc['value'].plot(color='red', label='CurMktCap')
plt.grid()
plt.ylabel("Value (in tens of millions)")
plt.axvspan(455, 458, color="blue", alpha=0.2)
plt.axvspan(493, 498, color="blue", alpha=0.2)
plt.suptitle("Cur Mkt Cap with one asset")
plt.show()
The highest peaks in returns are achieved in the first months of 2008 and 2011
We are now going to check, for each year, which asset is bought, in order to understand which are the stocks that cause the problem.
factor = get_factor('CUR_MKT_CAP')
year = 2003
for col in list(range(0,111,12)):
non_nan_stocks = price.iloc[:,col:col+11][~price.iloc[:,col:col+11].isnull().any(axis=1)].index
print(year, ": ", list(factor.loc[non_nan_stocks].iloc[:,col].sort_values(ascending=True).index)[0])
year += 1
2003 : WDI GR 2004 : H4G GR 2005 : ABE1 GR 2006 : LCA1 GR 2007 : MSU GR 2008 : EUZ GR 2009 : SIS GR 2010 : SIS GR 2011 : PRC FP 2012 : FNM IM
The stocks causing the problems are those bought in year 2007 and 2011: 'MSU GR' and 'PRC FP'. We plot the prices below:
plt.figure(figsize =(10, 6))
price.loc['MSU GR'].plot(label = 'MSU GR')
price.loc['PRC FP'].plot(label = 'PRC FP')
plt.axvspan(455, 458, color="blue", alpha=0.2)
plt.axvspan(493, 498, color="blue", alpha=0.2)
plt.ylabel("price")
plt.title("prices of MSU GRU and PRC FP")
plt.legend()
plt.grid()
plt.show()
February 2008: MSU GR starts from a price of 75 and gets to a price of 125 (+66%).
April 2011: PRC FP starts from a price of 8.1 and gets to a price of 26.99 (+233%).
Considered the problems brought by the factor "Current Market Cap", we decided to remove the portfolios built with this factor, since they achieve unrealistic results. From now on, we will not consider Current Market Cap as a factor to build portfolios.
We are now going to plot the values of the 5 best univariate portfolios without considering the Current Market Cap.
uni_pe = uniPortfolio(get_factor('PE_RATIO'), 12, worst=diz_factors['PE_RATIO'])
uni_evtosales = uniPortfolio(get_factor('CURRENT_EV_TO_12M_SALES'), 12, worst=diz_factors['CURRENT_EV_TO_12M_SALES'])
uni_avgEbitda = uniPortfolio(get_factor('FIVE_YEAR_AVG_EV_TO_T12_EBITDA'), 12, worst=diz_factors['FIVE_YEAR_AVG_EV_TO_T12_EBITDA'])
uni_currEbitda = uniPortfolio(get_factor('CURRENT_EV_TO_T12M_EBITDA'), 12, worst=diz_factors['CURRENT_EV_TO_T12M_EBITDA'])
uni_pxToBook = uniPortfolio(get_factor('PX_TO_BOOK_RATIO'), 12, worst=diz_factors['PX_TO_BOOK_RATIO'])
plt.figure(figsize =(10, 6))
benchmark.loc['value'].plot(label = 'benchmark', color='black')
uni_pe.loc['value'].plot(label = 'PE_RATIO', color='C0')
uni_evtosales.loc['value'].plot(label = 'CURRENT_EV_TO_12M_SALES', color='C1')
uni_avgEbitda.loc['value'].plot(label = 'FIVE_YEAR_AVG_EV_TO_T12_EBITDA', color='C2')
uni_currEbitda.loc['value'].plot(label = 'CURRENT_EV_TO_T12M_EBITDA', color='C3')
uni_pxToBook.loc['value'].plot(label = 'PX_TO_BOOK_RATIO', color='C4')
plt.text(508, uni_pe.loc['value'][-1], 'IR: ' + str(round(IR(uni_pe),2)), color='C0')
plt.text(508, uni_evtosales.loc['value'][-1], 'IR: ' + str(round(IR(uni_evtosales),2)), color='C1')
plt.text(508, uni_avgEbitda.loc['value'][-1] +200000, 'IR: ' + str(round(IR(uni_avgEbitda),2)), color='C2')
plt.text(508, uni_currEbitda.loc['value'][-1] -200000, 'IR: ' + str(round(IR(uni_currEbitda),2)), color='C3')
plt.text(508, uni_pxToBook.loc['value'][-1], 'IR: ' + str(round(IR(uni_pxToBook),2)), color='C4')
plt.title("Comparison between the 5 best univariate portfolios")
plt.ylabel("value (in tens of millions)")
plt.legend()
plt.grid()
plt.show()
We are now going to write a function to build a sequential portfolio. The screening portfolios are going to be built using three factors. On each of the first two screening phases, the best 30% stocks are selected by the function. In the last screening phase the function selects the best 15 stocks. The holding period is still set to 12.
def seqPortfolio(data, holding, worst = None, wealth = 600000):
# build the "worst" list
if worst is None:
worst = []
for factor in range(len(data)):
worst.append(diz_factors[data[factor]])
# Initiate dataset
sub = pd.DataFrame(columns=time, index = stocks)
# Set i = 1, we want to start at month 2, so that we can use the information of the factor at month 1
i=1
# Remove the transaction cost in the first iteration of wealth
wealth = wealth * 0.998
# Loop through i. i is going to represent the month in which the selected stocks are modified based on the factor
while i < len(time):
# If we are not in the first month, we have to compute the portfolio value in the new month before reallocation
if i != 1:
portfT = sub.iloc[:,i-1] * price_mul.iloc[:,i]
# Overwrite the wealth as the portfolio total value
wealth = portfT.sum()
# Create a copy of the factors
factor1 = get_factor(data[0])
factor2 = get_factor(data[1])
factor3 = get_factor(data[2])
# From this new copy, select only the rows of the stocks for which we have the price_mul information
factor1 = factor1.loc[price_mul.iloc[:,i].notnull(),:]
# Select stocks based on factor
sel_stocks = factor1.sort_values(factor1.columns[i-1], ascending=worst[0]).index[:round(len(factor1)*0.3)]
factor2 = factor2.loc[sel_stocks]
sel_stocks = factor2.sort_values(factor2.columns[i-1], ascending=worst[1]).index[:round(len(factor2)*0.3)]
factor3 = factor3.loc[sel_stocks]
sel_stocks = factor3.sort_values(factor3.columns[i-1], ascending=worst[2]).index[:15]
# Allocate wealth equally through the selected stocks
sub.loc[sel_stocks, sub.columns[i]] = wealth/len(sel_stocks)
# If we are not in the first month, than we have to compute the transaction costs
if i != 1:
# We compute the transaction costs based on the difference between the portfolio before and after reallocation
trans_cost = abs(sub.iloc[:,i] - portfT).sum() * 0.002
# Remove the transaction cost from total wealth
wealth = wealth - trans_cost
# Overwrite allocation of wealth equally through the selected stocks
sub.loc[sel_stocks, sub.columns[i]] = wealth/len(sel_stocks)
# J is going to loop through the months in which there is no reallocation
for j in range(1,holding):
# Break the j loop if the months are finished
if i + j > len(time)-1:
break
# Compute the value of all the stocks at time t as their value at time t-1 times theprice multiplicator at time t
sub.iloc[:,i+j] = sub.iloc[:,i+j-1] * price_mul.iloc[:,i+j]
# Increment i by the holding period before next iteration
i += holding
# Since we started from i=1, the first element of the subset is going to be 0, so we are going to remove it
# Build output dataframe
portf = pd.DataFrame(columns=time[1:])
# Insert value information
portf.loc['value'] = sub.sum()[1:]
# Insert return information
portf.loc['returns'] = (portf.loc['value'].astype('float') - portf.loc['value'].shift(1).astype('float'))/portf.loc['value'].shift(1).astype('float')
portf.loc['cum_returns'] = (portf.loc['value'].astype('float') - portf.loc['value'][0])/portf.loc['value'][0]
portf.loc['log_returns'] = np.log(portf.loc['value'].astype('float')) - np.log(portf.loc['value'].shift(1).astype('float'))
return portf
The first portfolio is created by 3 sequential screening over the three best performing factors from the univariate strategy.
portf_seq_top3 = seqPortfolio(['PE_RATIO','CURRENT_EV_TO_12M_SALES','FIVE_YEAR_AVG_EV_TO_T12_EBITDA'], 12)
portf_seq_top3
Date | 2003-02-28 | 2003-03-31 | 2003-04-30 | 2003-05-30 | 2003-06-30 | 2003-07-31 | 2003-08-29 | 2003-09-30 | 2003-10-31 | 2003-11-28 | ... | 2011-06-30 | 2011-07-29 | 2011-08-31 | 2011-09-30 | 2011-10-31 | 2011-11-30 | 2011-12-30 | 2012-01-31 | 2012-02-29 | 2012-03-30 |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
value | 598800.0 | 604790.262009 | 661637.714469 | 713975.581809 | 737923.832265 | 787567.109524 | 843784.158616 | 846232.486133 | 956529.571639 | 962561.761368 | ... | 6194101.956307 | 6030898.120106 | 5520898.077803 | 5091280.32388 | 5350267.86917 | 4957809.266168 | 4945702.428624 | 5252104.850658 | 5554369.514622 | 5807879.492871 |
returns | NaN | 0.010004 | 0.093995 | 0.079104 | 0.033542 | 0.067274 | 0.071381 | 0.002902 | 0.130339 | 0.006306 | ... | -0.022299 | -0.026348 | -0.084565 | -0.077817 | 0.050869 | -0.073353 | -0.002442 | 0.061953 | 0.057551 | 0.045642 |
cum_returns | 0.0 | 0.010004 | 0.104939 | 0.192344 | 0.232338 | 0.315242 | 0.409125 | 0.413214 | 0.597411 | 0.607485 | ... | 9.344192 | 9.07164 | 8.219937 | 7.502472 | 7.934983 | 7.279575 | 7.259356 | 7.77105 | 8.275834 | 8.699198 |
log_returns | NaN | 0.009954 | 0.089836 | 0.076131 | 0.032992 | 0.065108 | 0.068948 | 0.002897 | 0.122518 | 0.006287 | ... | -0.022552 | -0.026702 | -0.088355 | -0.081011 | 0.049617 | -0.076183 | -0.002445 | 0.06011 | 0.055956 | 0.044631 |
4 rows × 110 columns
The second portfolio is created in the same way as the first, but the three screening are inverted. Therefore, the first screening is done with the third best factor, the second with the second best factor, and the third with the first best univariate factor.
portf_seq_top3_inv = seqPortfolio(['FIVE_YEAR_AVG_EV_TO_T12_EBITDA','CURRENT_EV_TO_12M_SALES','PE_RATIO'], 12)
portf_seq_top3
Date | 2003-02-28 | 2003-03-31 | 2003-04-30 | 2003-05-30 | 2003-06-30 | 2003-07-31 | 2003-08-29 | 2003-09-30 | 2003-10-31 | 2003-11-28 | ... | 2011-06-30 | 2011-07-29 | 2011-08-31 | 2011-09-30 | 2011-10-31 | 2011-11-30 | 2011-12-30 | 2012-01-31 | 2012-02-29 | 2012-03-30 |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
value | 598800.0 | 604790.262009 | 661637.714469 | 713975.581809 | 737923.832265 | 787567.109524 | 843784.158616 | 846232.486133 | 956529.571639 | 962561.761368 | ... | 6194101.956307 | 6030898.120106 | 5520898.077803 | 5091280.32388 | 5350267.86917 | 4957809.266168 | 4945702.428624 | 5252104.850658 | 5554369.514622 | 5807879.492871 |
returns | NaN | 0.010004 | 0.093995 | 0.079104 | 0.033542 | 0.067274 | 0.071381 | 0.002902 | 0.130339 | 0.006306 | ... | -0.022299 | -0.026348 | -0.084565 | -0.077817 | 0.050869 | -0.073353 | -0.002442 | 0.061953 | 0.057551 | 0.045642 |
cum_returns | 0.0 | 0.010004 | 0.104939 | 0.192344 | 0.232338 | 0.315242 | 0.409125 | 0.413214 | 0.597411 | 0.607485 | ... | 9.344192 | 9.07164 | 8.219937 | 7.502472 | 7.934983 | 7.279575 | 7.259356 | 7.77105 | 8.275834 | 8.699198 |
log_returns | NaN | 0.009954 | 0.089836 | 0.076131 | 0.032992 | 0.065108 | 0.068948 | 0.002897 | 0.122518 | 0.006287 | ... | -0.022552 | -0.026702 | -0.088355 | -0.081011 | 0.049617 | -0.076183 | -0.002445 | 0.06011 | 0.055956 | 0.044631 |
4 rows × 110 columns
plt.figure(figsize =(10, 6))
uni_pe.loc['value'].plot(label = 'PE_RATIO', color='C0', linestyle = 'dashed')
portf_seq_top3.loc['value'].plot(label = 'Portfolio Top3', color='C1')
portf_seq_top3_inv.loc['value'].plot(label = 'Portfolio Top3 Inv', color='C2')
benchmark.loc['value'].plot(label = 'Benchmark', color = 'black')
plt.grid()
plt.title("Comparison between sequential portfolios")
plt.ylabel("Value (in millions)")
plt.text(508, uni_pe.loc['value'][-1], 'IR: ' + str(round(IR(uni_pe),2)), color='C0')
plt.text(508, portf_seq_top3.loc['value'][-1], 'IR: ' + str(round(IR(portf_seq_top3),2)), color='C1')
plt.text(508, portf_seq_top3_inv.loc['value'][-1], 'IR: ' + str(round(IR(portf_seq_top3_inv),2)), color='C2')
plt.legend()
plt.show()
# Initialize an empty rank dataframe
rank_seq = pd.DataFrame(columns = ['Information', 'Sharpe', 'Sortino'],
index = ['portf_seq_top3', 'portf_seq_top3_inv'])
# For each portfolio insert in the dataframe its resulting metrics
port_list = [portf_seq_top3, portf_seq_top3_inv]
for i in range(2):
rank_seq.iloc[i]['Information'] = IR(port_list[i])
rank_seq.iloc[i]['Sharpe'] = Sharpe(port_list[i])
rank_seq.iloc[i]['Sortino'] = Sortino(port_list[i])
# Order the resulting portfolios by their resulting Information Ratio
rank_seq
Information | Sharpe | Sortino | |
---|---|---|---|
portf_seq_top3 | 1.092642 | 0.729568 | 0.895137 |
portf_seq_top3_inv | 0.841387 | 0.657162 | 0.803223 |
The portfolio built with the top 3 factors is the one achieving higher performances on all the available metrics. Though, none of the two sequential portfolio seem to achieve as high as the best univariate portfolio (built using the pe ratio).
We are now going to write a function to build a simultaneous portfolio.
def simPortfolio(data, holding, wealth = 600000, n_stocks=15):
# Initialize worst list
worst = []
for factor in range(len(data)):
worst.append(diz_factors[data[factor]])
# Initiate dataset
sub = pd.DataFrame(columns=time, index = stocks)
# Set i = 1, we want to start at month 2, so that we can use the information of the factor at month 1
i=1
# Remove the transaction cost in the first iteration of wealth
wealth = wealth * 0.998
# Loop through i. i is going to represent the month in which the selected stocks are modified based on the factor
while i < len(time):
# If we are not in the first month, we have to compute the portfolio value in the new month before reallocation
if i != 1:
portfT = sub.iloc[:,i-1] * price_mul.iloc[:,i]
# Overwrite the wealth as the portfolio total value
wealth = portfT.sum()
# Create a copy of the factors
fac = []
for el in range(len(data)):
fac.append(get_factor(data[el]).loc[price_mul.iloc[:,i].notnull(),:])
# Initialize a df that is going to contain the z scores
sub_factor = pd.DataFrame(columns=range(len(data)+1), index = fac[0].index)
for el in range(len(data)):
# Scale every column
new_col = scale(fac[el].iloc[:,i-1])
# If we have to get the bottom value stocks, we multiplicate the corresponding column for -1
if worst[el]:
new_col = new_col * -1
sub_factor[el] = new_col
# The last column of the df contains the average of the z score
sub_factor.iloc[:,-1] = sub_factor.mean(axis=1)
# We select the best stocks based on this last column
sel_stocks = sub_factor.sort_values(sub_factor.columns[-1], ascending=False).index[:n_stocks]
# Allocate wealth equally through the selected stocks
sub.loc[sel_stocks, sub.columns[i]] = wealth/len(sel_stocks)
# If we are not in the first month, than we have to compute the transaction costs
if i != 1:
# We compute the transaction costs based on the difference between the portfolio before and after reallocation
trans_cost = abs(sub.iloc[:,i] - portfT).sum() * 0.002
# Remove the transaction cost from total wealth
wealth = wealth - trans_cost
# Overwrite allocation of wealth equally through the selected stocks
sub.loc[sel_stocks, sub.columns[i]] = wealth/len(sel_stocks)
# J is going to loop through the months in which there is no reallocation
for j in range(1,holding):
# Break the j loop if the months are finished
if i + j > len(time)-1:
break
# Compute the value of all the stocks at time t as their value at time t-1 times theprice multiplicator at time t
sub.iloc[:,i+j] = sub.iloc[:,i+j-1] * price_mul.iloc[:,i+j]
# Increment i by the holding period before next iteration
i += holding
# Since we started from i=1, the first element of the subset is going to be 0, so we are going to remove it
# Build output dataframe
portf = pd.DataFrame(columns=time[1:])
# Insert value information
portf.loc['value'] = sub.sum()[1:]
# Insert return information
portf.loc['returns'] = (portf.loc['value'].astype('float') - portf.loc['value'].shift(1).astype('float'))/portf.loc['value'].shift(1).astype('float')
portf.loc['cum_returns'] = (portf.loc['value'].astype('float') - portf.loc['value'][0])/portf.loc['value'][0]
portf.loc['log_returns'] = np.log(portf.loc['value'].astype('float')) - np.log(portf.loc['value'].shift(1).astype('float'))
return portf
The first portfolio is built using the 3 factors corresponding to the 3 best univariate portfolios.
portf_sim_top3 = simPortfolio(['PE_RATIO','CURRENT_EV_TO_12M_SALES','FIVE_YEAR_AVG_EV_TO_T12_EBITDA'], 12)
portf_sim_top3
Date | 2003-02-28 | 2003-03-31 | 2003-04-30 | 2003-05-30 | 2003-06-30 | 2003-07-31 | 2003-08-29 | 2003-09-30 | 2003-10-31 | 2003-11-28 | ... | 2011-06-30 | 2011-07-29 | 2011-08-31 | 2011-09-30 | 2011-10-31 | 2011-11-30 | 2011-12-30 | 2012-01-31 | 2012-02-29 | 2012-03-30 |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
value | 598800.0 | 617508.808839 | 706541.893857 | 710354.620817 | 756534.472363 | 848790.42887 | 884299.704912 | 880158.792739 | 968782.255865 | 1016708.20419 | ... | 3959043.090802 | 3900238.491907 | 3510312.977577 | 3278671.988075 | 3390207.807767 | 3244822.484614 | 3303015.975812 | 3384307.127542 | 3671914.783331 | 3826073.925732 |
returns | NaN | 0.031244 | 0.144181 | 0.005396 | 0.06501 | 0.121945 | 0.041835 | -0.004683 | 0.10069 | 0.04947 | ... | -0.063493 | -0.014853 | -0.099975 | -0.065989 | 0.034019 | -0.042884 | 0.017934 | 0.024611 | 0.084983 | 0.041983 |
cum_returns | 0.0 | 0.031244 | 0.17993 | 0.186297 | 0.263418 | 0.417486 | 0.476786 | 0.469871 | 0.617873 | 0.697909 | ... | 5.611628 | 5.513424 | 4.862246 | 4.475404 | 4.66167 | 4.418875 | 4.516059 | 4.651816 | 5.132122 | 5.389569 |
log_returns | NaN | 0.030766 | 0.134689 | 0.005382 | 0.062984 | 0.115064 | 0.040984 | -0.004694 | 0.095938 | 0.048286 | ... | -0.065598 | -0.014965 | -0.105333 | -0.068267 | 0.033453 | -0.043831 | 0.017775 | 0.024313 | 0.081564 | 0.041126 |
4 rows × 110 columns
The last portfolio is built using the 6 factors corresponding to the best 6 best univariate portfolios.
portf_sim_top6 = simPortfolio(['PE_RATIO','CURRENT_EV_TO_12M_SALES','FIVE_YEAR_AVG_EV_TO_T12_EBITDA',
'CURRENT_EV_TO_T12M_EBITDA', 'PX_TO_BOOK_RATIO', 'NORMALIZED_ACCRUALS_CF_METHOD'], 12)
portf_sim_top6
Date | 2003-02-28 | 2003-03-31 | 2003-04-30 | 2003-05-30 | 2003-06-30 | 2003-07-31 | 2003-08-29 | 2003-09-30 | 2003-10-31 | 2003-11-28 | ... | 2011-06-30 | 2011-07-29 | 2011-08-31 | 2011-09-30 | 2011-10-31 | 2011-11-30 | 2011-12-30 | 2012-01-31 | 2012-02-29 | 2012-03-30 |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
value | 598800.0 | 596411.815668 | 686730.452715 | 695754.610035 | 774412.47938 | 862734.482066 | 1039841.880571 | 999939.47258 | 1186294.118391 | 1175397.848545 | ... | 13343887.837536 | 12947041.015565 | 11874416.913412 | 11222909.600969 | 11364509.042881 | 10892290.292138 | 11110460.672337 | 11517024.695496 | 12198350.325132 | 12524348.917149 |
returns | NaN | -0.003988 | 0.151437 | 0.013141 | 0.113054 | 0.11405 | 0.205286 | -0.038374 | 0.186366 | -0.009185 | ... | -0.056139 | -0.02974 | -0.082847 | -0.054866 | 0.012617 | -0.041552 | 0.02003 | 0.036593 | 0.059158 | 0.026725 |
cum_returns | 0.0 | -0.003988 | 0.146844 | 0.161915 | 0.293274 | 0.440772 | 0.736543 | 0.669906 | 0.981119 | 0.962922 | ... | 21.284382 | 20.621645 | 18.830356 | 17.742334 | 17.978806 | 17.190198 | 17.554544 | 18.233508 | 19.371327 | 19.915746 |
log_returns | NaN | -0.003996 | 0.14101 | 0.013055 | 0.107108 | 0.108002 | 0.186717 | -0.039129 | 0.170895 | -0.009228 | ... | -0.057776 | -0.030191 | -0.086481 | -0.056429 | 0.012538 | -0.04244 | 0.019832 | 0.035939 | 0.057474 | 0.026374 |
4 rows × 110 columns
plt.figure(figsize =(10, 6))
uni_pe.loc['value'].plot(label = 'PE_RATIO', color='C0', linestyle = 'dashed')
portf_seq_top3.loc['value'].plot(label = 'Sequential Top3', color='C1', linestyle = 'dashed')
portf_sim_top3.loc['value'].plot(label = "Simultaneous Top3", color='C2')
portf_sim_top6.loc['value'].plot(label = 'Simultaneous Top6', color='C3')
benchmark.loc['value'].plot(label = "benchmark", color='black')
plt.grid()
plt.ylabel("Value (in millions)")
plt.text(508, uni_pe.loc['value'][-1], 'IR: ' + str(round(IR(uni_pe),2)), color='C0')
plt.text(508, portf_seq_top3.loc['value'][-1], 'IR: ' + str(round(IR(portf_seq_top3),2)), color='C1')
plt.text(508, portf_sim_top3.loc['value'][-1], 'IR: ' + str(round(IR(portf_sim_top3),2)), color='C2')
plt.text(508, portf_sim_top6.loc['value'][-1], 'IR: ' + str(round(IR(portf_sim_top6),2)), color='C3')
plt.title("Comparison between univariate, sequential and simultaneous portfolios.")
plt.ylabel("value (in tens of millions)")
plt.legend()
plt.show()
While the simultaneous top 3 portfolio didn't achieve great returns, the simultaneous top 6 portfolio outperformed all the other strategies. The higher performances are linked with more risk, and this is reflected in the Information Ratio, that is still lower than the univariate portfolio built with the PE ratio.
In this section we are going to introduce a methodology that aims at resolving the factor correlation problem in screening models. To do so, we are going to measure correlation between the portfolios resulting from the univariate screening models. At each step we are going to select the factor that achieves the best Information Ratio among the ones that are correlated with the already selected factor under a certain level (in thi case set to ).
rank.sort_values(by='Information', ascending=False).iloc[1]
Information 1.268581 Sharpe 0.775805 Sortino 0.982374 Name: PE_RATIO, dtype: object
data_univariate = []
for factor in tot_factors:
data_univariate.append(list(uniPortfolio(get_factor(factor),12).loc['value']))
df_corr1 = np.corrcoef(data_univariate)
plt.imshow(df_corr1, cmap = 'RdBu_r', interpolation='nearest', vmax=1, vmin=0)
# Add legend
plt.colorbar()
labels = tot_factors
# Create list with tick_mark positions
tick_marks = [i for i in range(len(tot_factors))]
# Add the tickmarks at the designated position, using the labels of the dataframe
plt.xticks(tick_marks, labels, rotation='vertical', fontsize=9)
plt.yticks(tick_marks, labels, fontsize=9)
plt.show()
df_corr1 = pd.DataFrame(df_corr1, columns = tot_factors, index = tot_factors)
uncorr_factors = df_corr1.loc[:,'PE_RATIO'][df_corr1.loc[:,'PE_RATIO'] < 0.85]
uncorr_factors
CURRENT_EV_TO_12M_SALES 0.827129 TRAIL_12M_SALES_PER_SH 0.801906 TANG_BOOK_VAL_PER_SH 0.811133 EBITDA_MARGIN_3YR_AVG 0.771817 RSI_14D 0.750167 RSI_30D 0.823930 RSI_9D 0.687097 Name: PE_RATIO, dtype: float64
rank.loc[uncorr_factors.index].sort_values(by='Information', ascending=False)
Information | Sharpe | Sortino | |
---|---|---|---|
CURRENT_EV_TO_12M_SALES | 1.190999 | 0.694106 | 0.894214 |
EBITDA_MARGIN_3YR_AVG | 0.668972 | 0.583426 | 0.969576 |
TRAIL_12M_SALES_PER_SH | 0.597328 | 0.588323 | 0.883077 |
RSI_9D | 0.532972 | 0.850518 | 1.085326 |
RSI_30D | 0.453949 | 0.516281 | 0.768311 |
RSI_14D | 0.306011 | 0.482653 | 0.85147 |
TANG_BOOK_VAL_PER_SH | -0.93403 | 0.45706 | 0.514472 |
uncorr_factors = df_corr1.loc[uncorr_factors.index].loc[:,'CURRENT_EV_TO_12M_SALES'][df_corr1.loc[:,'CURRENT_EV_TO_12M_SALES'] < 0.85]
uncorr_factors
TRAIL_12M_SALES_PER_SH 0.829442 EBITDA_MARGIN_3YR_AVG 0.708955 Name: CURRENT_EV_TO_12M_SALES, dtype: float64
rank.loc[uncorr_factors.index].sort_values(by='Information', ascending=False)
Information | Sharpe | Sortino | |
---|---|---|---|
EBITDA_MARGIN_3YR_AVG | 0.668972 | 0.583426 | 0.969576 |
TRAIL_12M_SALES_PER_SH | 0.597328 | 0.588323 | 0.883077 |
plt.imshow(df_corr1.loc[['PE_RATIO', 'CURRENT_EV_TO_12M_SALES', 'EBITDA_MARGIN_3YR_AVG'], ['PE_RATIO', 'CURRENT_EV_TO_12M_SALES', 'EBITDA_MARGIN_3YR_AVG']],
cmap = 'RdBu_r', interpolation='nearest', vmax=1, vmin=0)
# Add legend
plt.colorbar()
labels = ['PE_RATIO', 'CURRENT_EV_TO_12M_SALES', 'EBITDA_MARGIN_3YR_AVG']
# Create list with tick_mark positions
tick_marks = [i for i in range(3)]
# Add the tickmarks at the designated position, using the labels of the dataframe
plt.xticks(tick_marks, labels, rotation='vertical', fontsize=9)
plt.yticks(tick_marks, labels, fontsize=9)
plt.text(0, 1, '0.827', horizontalalignment="center", color='white')
plt.text(0, 2, '0.772', horizontalalignment="center", color='white')
plt.text(1, 2, '0.709', horizontalalignment="center", color='white')
plt.show()
portf_uncorr_seq1 = seqPortfolio(['PE_RATIO', 'CURRENT_EV_TO_12M_SALES', 'EBITDA_MARGIN_3YR_AVG'], 12)
portf_uncorr_seq_inv1 = seqPortfolio(['EBITDA_MARGIN_3YR_AVG','CURRENT_EV_TO_12M_SALES', 'PE_RATIO'], 12)
portf_uncorr_sim1 = simPortfolio(['PE_RATIO', 'CURRENT_EV_TO_12M_SALES', 'EBITDA_MARGIN_3YR_AVG'], 12)
In this second strategy, instead of building the portfolios evaluating the correlation between the univariate portfolios built with each factor, we compute the correlation between each factor itself.
corr_list = []
# For every stock, build a correlation matrix of the time series of its factor, and append it to the corr_list
for stock in stocks:
ind = df.index.get_loc(stock + ' Equity')
corr_list.append(df.iloc[ind+2:ind+42].transpose().astype('float64').corr())
# Calculate the df_corr2 as the average correlation matrix between all the correlation matrixes in the list
df_corr2 = pd.concat(corr_list)
df_corr2 = df_corr2.groupby(level=0).mean()
df_corr2 = df_corr2[list(df_corr2.index)]
df_corr2 = df_corr2.loc[tot_factors,tot_factors]
plt.imshow(df_corr2, cmap = 'RdBu_r', interpolation='nearest', vmax=1, vmin=-1)
# Add legend
plt.colorbar()
labels = tot_factors
# Create list with tick_mark positions
tick_marks = [i for i in range(len(tot_factors))]
# Add the tickmarks at the designated position, using the labels of the dataframe
plt.xticks(tick_marks, labels, rotation='vertical', fontsize=9)
plt.yticks(tick_marks, labels, fontsize=9)
plt.show()
uncorr_factors = df_corr2.loc[:,'PE_RATIO'][df_corr2.loc[:,'PE_RATIO'] < 0.2]
uncorr_factors
0 FIVE_YEAR_AVG_EV_TO_T12_EBITDA 0.122001 TRAIL_12M_EBITDA_PER_SHARE -0.331919 TRAIL_12M_SALES_PER_SH -0.190967 NET_DEBT_PER_SHARE 0.020325 TANG_BOOK_VAL_PER_SH -0.069882 NORMALIZED_ACCRUALS_CF_METHOD -0.137099 EBITDA_MARGIN -0.281752 EBITDA_MARGIN_3YR_AVG -0.105860 RSI_30D 0.162066 OPERATING_ROIC -0.299678 EQY_DPS_NET_5YR_GROWTH -0.110507 EQY_REC_CONS -0.067301 WACC_COST_EQUITY -0.051332 NORMALIZED_ROE -0.428298 5YR_AVG_RETURN_ON_EQUITY -0.251224 CUR_MKT_CAP 0.138047 Name: PE_RATIO, dtype: float64
rank.loc[uncorr_factors.index].sort_values(by='Information', ascending=False)
Information | Sharpe | Sortino | |
---|---|---|---|
0 | |||
CUR_MKT_CAP | 2.572272 | 0.964028 | 1.774545 |
FIVE_YEAR_AVG_EV_TO_T12_EBITDA | 1.158677 | 0.756905 | 0.924165 |
NORMALIZED_ACCRUALS_CF_METHOD | 0.887866 | 0.667879 | 1.106075 |
5YR_AVG_RETURN_ON_EQUITY | 0.852702 | 0.560658 | 0.861664 |
TRAIL_12M_EBITDA_PER_SHARE | 0.735485 | 0.576269 | 1.07594 |
EBITDA_MARGIN_3YR_AVG | 0.668972 | 0.583426 | 0.969576 |
TRAIL_12M_SALES_PER_SH | 0.597328 | 0.588323 | 0.883077 |
EBITDA_MARGIN | 0.578179 | 0.545594 | 1.023334 |
RSI_30D | 0.453949 | 0.516281 | 0.768311 |
EQY_DPS_NET_5YR_GROWTH | 0.388422 | 0.704911 | 1.085478 |
NET_DEBT_PER_SHARE | 0.142135 | 0.846456 | 1.029318 |
WACC_COST_EQUITY | -0.047826 | 0.889192 | 1.347257 |
OPERATING_ROIC | -0.212811 | 0.722881 | 0.958099 |
EQY_REC_CONS | -0.225417 | 0.702182 | 0.759324 |
NORMALIZED_ROE | -0.287531 | 0.62834 | 0.76751 |
TANG_BOOK_VAL_PER_SH | -0.93403 | 0.45706 | 0.514472 |
uncorr_factors = df_corr2.loc[uncorr_factors.index].loc[:,'FIVE_YEAR_AVG_EV_TO_T12_EBITDA'][df_corr2.loc[:,'FIVE_YEAR_AVG_EV_TO_T12_EBITDA'] < 0.2]
uncorr_factors
0 TRAIL_12M_EBITDA_PER_SHARE -0.085002 TRAIL_12M_SALES_PER_SH -0.027029 NET_DEBT_PER_SHARE 0.118059 TANG_BOOK_VAL_PER_SH -0.085059 NORMALIZED_ACCRUALS_CF_METHOD 0.063244 EBITDA_MARGIN -0.094127 EBITDA_MARGIN_3YR_AVG -0.113233 RSI_30D 0.004878 OPERATING_ROIC -0.040279 EQY_DPS_NET_5YR_GROWTH 0.023646 EQY_REC_CONS 0.034376 WACC_COST_EQUITY 0.026049 NORMALIZED_ROE 0.029498 5YR_AVG_RETURN_ON_EQUITY -0.017209 CUR_MKT_CAP 0.169720 Name: FIVE_YEAR_AVG_EV_TO_T12_EBITDA, dtype: float64
rank.loc[uncorr_factors.index].sort_values(by='Information', ascending=False)
Information | Sharpe | Sortino | |
---|---|---|---|
0 | |||
CUR_MKT_CAP | 2.572272 | 0.964028 | 1.774545 |
NORMALIZED_ACCRUALS_CF_METHOD | 0.887866 | 0.667879 | 1.106075 |
5YR_AVG_RETURN_ON_EQUITY | 0.852702 | 0.560658 | 0.861664 |
TRAIL_12M_EBITDA_PER_SHARE | 0.735485 | 0.576269 | 1.07594 |
EBITDA_MARGIN_3YR_AVG | 0.668972 | 0.583426 | 0.969576 |
TRAIL_12M_SALES_PER_SH | 0.597328 | 0.588323 | 0.883077 |
EBITDA_MARGIN | 0.578179 | 0.545594 | 1.023334 |
RSI_30D | 0.453949 | 0.516281 | 0.768311 |
EQY_DPS_NET_5YR_GROWTH | 0.388422 | 0.704911 | 1.085478 |
NET_DEBT_PER_SHARE | 0.142135 | 0.846456 | 1.029318 |
WACC_COST_EQUITY | -0.047826 | 0.889192 | 1.347257 |
OPERATING_ROIC | -0.212811 | 0.722881 | 0.958099 |
EQY_REC_CONS | -0.225417 | 0.702182 | 0.759324 |
NORMALIZED_ROE | -0.287531 | 0.62834 | 0.76751 |
TANG_BOOK_VAL_PER_SH | -0.93403 | 0.45706 | 0.514472 |
plt.imshow(df_corr2.loc[['PE_RATIO', 'FIVE_YEAR_AVG_EV_TO_T12_EBITDA', 'NORMALIZED_ACCRUALS_CF_METHOD'], ['PE_RATIO', 'FIVE_YEAR_AVG_EV_TO_T12_EBITDA', 'NORMALIZED_ACCRUALS_CF_METHOD']],
cmap = 'RdBu_r', interpolation='nearest', vmax=1, vmin=-1)
# Add legend
plt.colorbar()
labels = ['PE_RATIO', 'FIVE_YEAR_AVG_EV_TO_T12_EBITDA', 'NORMALIZED_ACCRUALS_CF_METHOD']
# Create list with tick_mark positions
tick_marks = [i for i in range(3)]
# Add the tickmarks at the designated position, using the labels of the dataframe
plt.xticks(tick_marks, labels, rotation='vertical', fontsize=9)
plt.yticks(tick_marks, labels, fontsize=9)
plt.text(0, 1, '0.122', horizontalalignment="center")
plt.text(0, 2, '-0.137', horizontalalignment="center")
plt.text(1, 2, '0.162', horizontalalignment="center")
plt.show()
portf_uncorr_seq2 = seqPortfolio(['PE_RATIO', 'FIVE_YEAR_AVG_EV_TO_T12_EBITDA', 'NORMALIZED_ACCRUALS_CF_METHOD'], 12)
portf_uncorr_seq_inv2 = seqPortfolio(['NORMALIZED_ACCRUALS_CF_METHOD', 'FIVE_YEAR_AVG_EV_TO_T12_EBITDA', 'PE_RATIO'], 12)
portf_uncorr_sim2 = simPortfolio(['PE_RATIO', 'FIVE_YEAR_AVG_EV_TO_T12_EBITDA', 'NORMALIZED_ACCRUALS_CF_METHOD'], 12)
plt.figure(figsize =(10, 6))
portf_seq_top3.loc['value'].plot(label = 'Portf Seq top3', linestyle = 'dashed', color='C1')
portf_uncorr_seq1.loc['value'].plot(label = 'Portf Seq Uncorrelated 1', color='mediumblue')
portf_uncorr_seq_inv1.loc['value'].plot(label = 'Portf Seq Uncorrelated Inv 1', color='royalblue')
portf_uncorr_seq2.loc['value'].plot(label = 'Portf Seq Uncorrelated 2', color='darkgreen')
portf_uncorr_seq_inv2.loc['value'].plot(label = 'Portf Seq Uncorrelated Inv 2', color='limegreen')
benchmark.loc['value'].plot(label = "benchmark", color='black')
plt.grid()
plt.title("Comparison of Sequential Portfolios")
plt.ylabel("Value (in millions)")
plt.text(508, portf_seq_top3.loc['value'][-1], 'IR: ' + str(round(IR(portf_seq_top3),2)), color='C1')
plt.text(508, portf_uncorr_seq1.loc['value'][-1], 'IR: ' + str(round(IR(portf_uncorr_seq1),2)), color='mediumblue')
plt.text(508, portf_uncorr_seq_inv1.loc['value'][-1], 'IR: ' + str(round(IR(portf_uncorr_seq_inv1),2)), color='royalblue')
plt.text(508, portf_uncorr_seq2.loc['value'][-1] + 50000, 'IR: ' + str(round(IR(portf_uncorr_seq2),2)), color='darkgreen')
plt.text(508, portf_uncorr_seq_inv2.loc['value'][-1] - 50000, 'IR: ' + str(round(IR(portf_uncorr_seq_inv2),2)), color='limegreen')
plt.legend()
plt.show()
The sequential portfolio built using the "uncorrelated" factors with strategy 1 achieves lower performances and a low Information Ratio (0.31). The opposite happens for the inverted version, that outperforms the standard sequential screening model, achieving an Information Ratio of 1.13.
For what regards the second strategy, both portfolios achieve very good performance 0.99, buth none of them beat the original sequential portfolio.
plt.figure(figsize =(10, 6))
portf_sim_top6.loc['value'].plot(label = "Simultaneous Top6", color = 'C3', linestyle = 'dashed')
portf_uncorr_sim1.loc['value'].plot(label = "Simultaneous Uncorrelated 1", color = 'mediumblue')
portf_uncorr_sim2.loc['value'].plot(label = "Simultaneous Uncorrelated 2", color = 'darkgreen')
benchmark.loc['value'].plot(label = "benchmark", color='black')
plt.title("Comparison of Sequential Portfolios")
plt.ylabel("Value (in millions)")
plt.grid()
plt.text(508, portf_sim_top6.loc['value'][-1], 'IR: ' + str(round(IR(portf_sim_top6),2)), color='C3')
plt.text(508, portf_uncorr_sim1.loc['value'][-1], 'IR: ' + str(round(IR(portf_uncorr_sim1),2)), color='mediumblue')
plt.text(508, portf_uncorr_sim2.loc['value'][-1], 'IR: ' + str(round(IR(portf_uncorr_sim2),2)), color='darkgreen')
plt.legend()
plt.show()
The uncorrelated strategy 1 does not show to be as effective as for the sequential portfolios. This probably is due to an extreme correlation between the univariate portfolios.
Neither the uncorrelated strategy 2 does not achieve as high information ratio as in the sequential portfolios. Nonetheless, the IR is pretty good (0.76).
Until now, all the tried strategy were equally weighted. We now try to change the simultaneous function in order to build new portfolios with a weighted strategy.
To weight the assets inside our portfolios we are going to use the volatility of the assets themselves. More specifically, we weighted by the inverse of volatility. This was done to produce less risky portfolios, since less volatile assets are going to receive a greater weight.
def weightPortfolio(data, holding, weight, wealth = 600000, n_stocks=15):
# Initialize worst list
worst = []
for factor in range(len(data)):
worst.append(diz_factors[data[factor]])
# Initiate dataset
sub = pd.DataFrame(columns=time, index = stocks)
# Set i = 1, we want to start at month 2, so that we can use the information of the factor at month 1
i=1
# Remove the transaction cost in the first iteration of wealth
wealth = wealth * 0.998
# Initialize weight dataset
weight = get_factor(weight).replace(0, np.nan)**(-1)
# Loop through i. i is going to represent the month in which the selected stocks are modified based on the factor
while i < len(time):
# If we are not in the first month, we have to compute the portfolio value in the new month before reallocation
if i != 1:
portfT = sub.iloc[:,i-1] * price_mul.iloc[:,i]
# Overwrite the wealth as the portfolio total value
wealth = portfT.sum()
# Create a copy of the factors
fac = []
for el in range(len(data)):
fac.append(get_factor(data[el]).loc[price_mul.iloc[:,i].notnull(),:])
# Select stocks based on factor
sub_factor = pd.DataFrame(columns=range(len(data)+1), index = fac[0].index)
for el in range(len(data)):
new_col = scale(fac[el].iloc[:,i-1])
if worst[el]:
new_col = new_col * -1
sub_factor[el] = new_col
sub_factor.iloc[:,-1] = sub_factor.mean(axis=1)
sel_stocks = sub_factor.sort_values(sub_factor.columns[-1], ascending=False).index[:n_stocks]
# Get sum of the weights
tot_weight = weight.loc[sel_stocks, weight.columns[i-1]].sum()
sub_weight = weight.loc[sel_stocks, weight.columns[i-1]] / tot_weight
# Allocate wealth equally through the selected stocks
sub.loc[sel_stocks, sub.columns[i]] = wealth * sub_weight
# If we are not in the first month, than we have to compute the transaction costs
if i != 1:
# We compute the transaction costs based on the difference between the portfolio before and after reallocation
trans_cost = abs(sub.iloc[:,i] - portfT).sum() * 0.002
# Remove the transaction cost from total wealth
wealth = wealth - trans_cost
# Overwrite allocation of wealth equally through the selected stocks
sub.loc[sel_stocks, sub.columns[i]] = wealth * sub_weight
# J is going to loop through the months in which there is no reallocation
for j in range(1,holding):
# Break the j loop if the months are finished
if i + j > len(time)-1:
break
# Compute the value of all the stocks at time t as their value at time t-1 times theprice multiplicator at time t
sub.iloc[:,i+j] = sub.iloc[:,i+j-1] * price_mul.iloc[:,i+j]
# Increment i by the holding period before next iteration
i += holding
# Since we started from i=1, the first element of the subset is going to be 0, so we are going to remove it
# Build output dataframe
portf = pd.DataFrame(columns=time[1:])
# Insert value information
portf.loc['value'] = sub.sum()[1:]
# Insert return information
portf.loc['returns'] = (portf.loc['value'].astype('float') - portf.loc['value'].shift(1).astype('float'))/portf.loc['value'].shift(1).astype('float')
portf.loc['cum_returns'] = (portf.loc['value'].astype('float') - portf.loc['value'][0])/portf.loc['value'][0]
portf.loc['log_returns'] = np.log(portf.loc['value'].astype('float')) - np.log(portf.loc['value'].shift(1).astype('float'))
return portf
portf_weight_180 = weightPortfolio(['PE_RATIO','CURRENT_EV_TO_12M_SALES','FIVE_YEAR_AVG_EV_TO_T12_EBITDA', 'CURRENT_EV_TO_T12M_EBITDA', 'PX_TO_BOOK_RATIO', 'NORMALIZED_ACCRUALS_CF_METHOD'], 12, 'VOLATILITY_180D')
portf_weight_90 = weightPortfolio(['PE_RATIO','CURRENT_EV_TO_12M_SALES','FIVE_YEAR_AVG_EV_TO_T12_EBITDA', 'CURRENT_EV_TO_T12M_EBITDA', 'PX_TO_BOOK_RATIO', 'NORMALIZED_ACCRUALS_CF_METHOD'], 12, 'VOLATILITY_90D')
portf_weight_30 = weightPortfolio(['PE_RATIO','CURRENT_EV_TO_12M_SALES','FIVE_YEAR_AVG_EV_TO_T12_EBITDA', 'CURRENT_EV_TO_T12M_EBITDA', 'PX_TO_BOOK_RATIO', 'NORMALIZED_ACCRUALS_CF_METHOD'], 12, 'VOLATILITY_30D')
plt.figure(figsize =(10, 6))
portf_weight_180.loc['value'].plot(label = "Weighted 180D", color = 'C0')
portf_weight_90.loc['value'].plot(label = "Weighted 90D", color = 'C1')
portf_weight_30.loc['value'].plot(label = "Weighted 30D", color = 'C2')
portf_sim_top6.loc['value'].plot(label = "Simultaneous top6", color = 'C3', linestyle = 'dashed')
benchmark.loc['value'].plot(label = "Benchmark", color='black')
plt.title("Comparison between weighted and non-weighted simultaneous portfolios")
plt.text(508, portf_weight_180.loc['value'][-1], 'IR: ' + str(round(IR(portf_weight_180),2)), color='C0')
plt.text(508, portf_weight_90.loc['value'][-1], 'IR: ' + str(round(IR(portf_weight_90),2)), color='C1')
plt.text(508, portf_weight_30.loc['value'][-1], 'IR: ' + str(round(IR(portf_weight_30),2)), color='C2')
plt.text(508, portf_sim_top6.loc['value'][-1], 'IR: ' + str(round(IR(portf_sim_top6),2)), color='C3')
plt.grid()
plt.legend()
plt.ylabel("value (in tens millions)")
plt.show()
# Initialize an empty rank dataframe
rank_weight = pd.DataFrame(columns = ['Information', 'Sharpe', 'Sortino'],
index = ['portf_weight_180', 'portf_weight_90', 'portf_weight_30', 'portf_sim_top6'])
# For each portfolio insert in the dataframe its resulting metrics
port_list = [portf_weight_180, portf_weight_90, portf_weight_30, portf_sim_top6]
for i in range(4):
rank_weight.iloc[i]['Information'] = IR(port_list[i])
rank_weight.iloc[i]['Sharpe'] = Sharpe(port_list[i])
rank_weight.iloc[i]['Sortino'] = Sortino(port_list[i])
rank_weight
Information | Sharpe | Sortino | |
---|---|---|---|
portf_weight_180 | 1.022047 | 0.705464 | 0.969694 |
portf_weight_90 | 0.981541 | 0.698841 | 0.948778 |
portf_weight_30 | 1.053439 | 0.71037 | 1.028956 |
portf_sim_top6 | 1.138483 | 0.696275 | 1.12746 |
As expected, the weighted portfolio are less risky. Because of that, they achieve less returns, but the metrics remain very good.
This section propose an AI strategy to build new portfolios. We used a rolling version of the Prophet model (developed by Meta) to forecast future returns for each asset, based on past-year factors. The predicted returns were then used as a factor for an univariate screening strategy. In other words, each year the portfolio is built using the stock with the best predicted total return at the end of next year.
from prophet import Prophet
sel_factors = list(rank.index[0:5])
wealth = 600000
skip_count = 0
i = 12
sub = pd.DataFrame(columns=time, index = stocks)
while i < len(time) - 12:
# If we are not in the first month, we have to compute the portfolio value in the new month before reallocation
if i != 12:
portfT = sub.iloc[:,i-1] * price_mul.iloc[:,i]
# Overwrite the wealth as the portfolio total value
wealth = portfT.sum()
stock_rank = {}
# For each stock, build the prophet prediction
for stock in stocks:
# Get the stock data
ind = df.index.get_loc(stock + ' Equity')
df_stock = df.iloc[range(ind + 2, ind + 42)].transpose()
# Change the data shape and names in order to be used as input by Prophet
df_stock['ds'] = df.loc['Date'].iloc[0]
df_stock.index = range(len(df_stock))
df_stock = df_stock.rename(columns = {'PX_LAST':'y'})
# If the stock has no information about the value of the factors that are going to be used as explanatory variable, skip this stock
if df_stock[sel_factors].isnull().sum().sum() != 0:
skip_count += 1
continue
# The train dataset utilizes all the factor values available until before the past year
train = df_stock[:i]
# + the price in the subsequent year of the factors (until past year)
train['y'] = df_stock.loc[range(12,i+12),'y'].values.tolist()
# The test dataset utilizes all the factor values available in the past year
test = pd.DataFrame(df_stock[i:i+12])
m = Prophet()
# This loop add every selected factor as explanatory variable in the model
for factor in sel_factors:
m.add_regressor(factor)
# Train the data
m.fit(train)
# Build the prediction
prediction = m.predict(test)
# Compute the predicted return
stock_rank[stock] = (prediction.iloc[-1]['yhat'] - train.iloc[-1]['y']) / train.iloc[-1]['y']
# The selected stocks are going to be those with the best predicted return
sel_stocks = pd.DataFrame(sorted(stock_rank.items(), key=lambda x:x[1], reverse=True)[:15])[0]
# Allocate wealth equally through the selected stocks
sub.loc[sel_stocks, sub.columns[i]] = wealth/len(sel_stocks)
# If we are not in the first month, than we have to compute the transaction costs
if i != 12:
# We compute the transaction costs based on the difference between the portfolio before and after reallocation
trans_cost = abs(sub.iloc[:,i] - portfT).sum() * 0.002
# Remove the transaction cost from total wealth
wealth = wealth - trans_cost
# Overwrite allocation of wealth equally through the selected stocks
sub.loc[sel_stocks, sub.columns[i]] = wealth/len(sel_stocks)
# J is going to loop through the months in which there is no reallocation
for j in range(1,12):
# Break the j loop if the months are finished
if i + j > len(time)-1:
break
# Compute the value of all the stocks at time t as their value at time t-1 times theprice multiplicator at time t
sub.iloc[:,i+j] = sub.iloc[:,i+j-1] * price_mul.iloc[:,i+j]
# Increment i by the holding period before next iteration
i += 12
portf = pd.DataFrame(columns=time[12:])
# Insert value information
portf.loc['value'] = sub.sum()
# Insert return information
portf.loc['returns'] = (portf.loc['value'].astype('float') - portf.loc['value'].shift(1).astype('float'))/portf.loc['value'].shift(1).astype('float')
portf.loc['cum_returns'] = (portf.loc['value'].astype('float') - portf.loc['value'][0])/portf.loc['value'][0]
portf.loc['log_returns'] = np.log(portf.loc['value'].astype('float')) - np.log(portf.loc['value'].shift(1).astype('float'))
nan_factors = []
for factor_name in sel_factors:
factor = get_factor(factor_name)
print(factor_name, factor.isnull().any(axis=1).sum())
CUR_MKT_CAP 14 PE_RATIO 453 CURRENT_EV_TO_12M_SALES 415 FIVE_YEAR_AVG_EV_TO_T12_EBITDA 526 CURRENT_EV_TO_T12M_EBITDA 498
While trying this strategy, we found that many stocks were not considered in the prediction due to the presence of nan values in the factors.
prop_portf = portf
# Build a benchmark that start at the same month
sub = pd.DataFrame(columns=time, index = stocks)
i=13
# Remove the transaction cost in the first iteration of wealth
wealth = 600000 * 0.998
# To calculate the benchmark we select only the stocks that have no null information in the price df
sel_stocks = price[~price.isnull().any(axis=1)].index
# Allocate wealth equally through the selected stocks
sub.loc[sel_stocks, sub.columns[12]] = wealth/len(sel_stocks)
# Calculate the value of the portfolio at each iteration of time
while i < len(time):
sub.iloc[:,i] = sub.iloc[:,i-1] * price_mul.iloc[:,i]
i += 1
# Build output dataframe
bench = pd.DataFrame(columns=time[12:])
# Insert value information
bench.loc['value'] = sub.sum()[1:]
# Insert return information
bench.loc['returns'] = (bench.loc['value'].astype('float') - bench.loc['value'].shift(1).astype('float'))/bench.loc['value'].shift(1).astype('float')
bench.loc['cum_returns'] = (bench.loc['value'].astype('float') - bench.loc['value'][0])/bench.loc['value'][0]
bench.loc['log_returns'] = np.log(bench.loc['value'].astype('float')) - np.log(bench.loc['value'].shift(1).astype('float'))
bench_mom = bench
plt.figure(figsize =(10, 6))
prop_portf_CURMKTCAP.loc['value'][:-12].plot(label = "Prophet CURMKTCAP", color = 'C0')
prop_portf.loc['value'][:-12].plot(label = "Prophet", color = 'C1')
bench_mom.loc['value'][:-12].plot(label = 'benchmark', color = 'black')
plt.title("Results of the Prophet Portfolio")
plt.ylabel("Value (in milions)")
plt.grid()
plt.legend()
plt.show()
The resulting performance are bad compared to the other portfolios built with the sequential and simultaneous strategies. This could be due to the fact that the model was not tuned and predicts badly the returns of the each stock in the next year. With a better tuning and data with less empty values, the new factor could be more robust and may provide better results.
The last strategy we developed uses momentum to select stocks at every holding period (3 months). The momentum is built using a cross-sectional approach, with 6-1 formation period.
wealth = 600000
i = 6
wealth = wealth * 0.998
sub = pd.DataFrame(columns=time, index = stocks)
while i < len(time):
# If we are not in the first month, we have to compute the portfolio value in the new month before reallocation
if i != 6:
portfT = sub.iloc[:,i-1] * price_mul.iloc[:,i]
# Overwrite the wealth as the portfolio total value
wealth = portfT.sum()
momentum_diz = {}
# For each stock, we compute its momentum
for stock in stocks:
# The momentum is computed as [price(time-6) - price(time-2)] / price(time-6)
momentum = (price.loc[stock,price.columns[i-2]] - price.loc[stock,price.columns[i-6]]) / price.loc[stock,price.columns[i-6]]
# We save the momentum in a dictionary
momentum_diz[stock] = momentum
# We select the 15 stocks with the best momentum
sel_stocks = pd.DataFrame(sorted(momentum_diz.items(), key=lambda x:x[1], reverse=True)[:15])[0]
# Allocate wealth equally through the selected stocks
sub.loc[sel_stocks, sub.columns[i]] = wealth/len(sel_stocks)
# If we are not in the first month, than we have to compute the transaction costs
if i != 6:
# We compute the transaction costs based on the difference between the portfolio before and after reallocation
trans_cost = abs(sub.iloc[:,i] - portfT).sum() * 0.002
# Remove the transaction cost from total wealth
wealth = wealth - trans_cost
# Overwrite allocation of wealth equally through the selected stocks
sub.loc[sel_stocks, sub.columns[i]] = wealth/len(sel_stocks)
# J is going to loop through the months in which there is no reallocation
for j in range(1,3):
# Break the j loop if the months are finished
if i + j > len(time)-1:
break
# Compute the value of all the stocks at time t as their value at time t-1 times theprice multiplicator at time t
sub.iloc[:,i+j] = sub.iloc[:,i+j-1] * price_mul.iloc[:,i+j]
# Increment i by the holding period before next iteration
i += 3
portf = pd.DataFrame(columns=time[6:])
# Insert value information
portf.loc['value'] = sub.sum()
# Insert return information
portf.loc['returns'] = (portf.loc['value'].astype('float') - portf.loc['value'].shift(1).astype('float'))/portf.loc['value'].shift(1).astype('float')
portf.loc['cum_returns'] = (portf.loc['value'].astype('float') - portf.loc['value'][0])/portf.loc['value'][0]
portf.loc['log_returns'] = np.log(portf.loc['value'].astype('float')) - np.log(portf.loc['value'].shift(1).astype('float'))
momentum_portf = portf
Since our momentum portfolio starts at time 7 (since it has to use the momentum computed on the past 6 months), we compare it to a benchmark that starts at the same time.
# Build a benchmark that start at the same month
sub = pd.DataFrame(columns=time, index = stocks)
i=7
# Remove the transaction cost in the first iteration of wealth
wealth = 600000 * 0.998
# To calculate the benchmark we select only the stocks that have no null information in the price df
sel_stocks = price[~price.isnull().any(axis=1)].index
# Allocate wealth equally through the selected stocks
sub.loc[sel_stocks, sub.columns[6]] = wealth/len(sel_stocks)
# Calculate the value of the portfolio at each iteration of time
while i < len(time):
sub.iloc[:,i] = sub.iloc[:,i-1] * price_mul.iloc[:,i]
i += 1
# Build output dataframe
bench = pd.DataFrame(columns=time[6:])
# Insert value information
bench.loc['value'] = sub.sum()[1:]
# Insert return information
bench.loc['returns'] = (bench.loc['value'].astype('float') - bench.loc['value'].shift(1).astype('float'))/bench.loc['value'].shift(1).astype('float')
bench.loc['cum_returns'] = (bench.loc['value'].astype('float') - bench.loc['value'][0])/bench.loc['value'][0]
bench.loc['log_returns'] = np.log(bench.loc['value'].astype('float')) - np.log(bench.loc['value'].shift(1).astype('float'))
bench_mom = bench
plt.figure(figsize =(10, 6))
momentum_portf.loc['value'].plot(label = 'momentum', color='C1')
bench_mom.loc['value'].plot(label = 'benchmark', color='black')
plt.text(508, momentum_portf.loc['value'][-1], 'IR: ' + str(round(IR(momentum_portf),2)), color='C1')
plt.grid()
plt.title('Comparison of Momentum Portfolio with Benchmark')
plt.ylabel('value (in tens of milions)')
plt.legend()
plt.show()
The portfolio built using the momentum strategy achieves a good Information Ratio (0.98) but not as high as other approaches.